{
 "metadata": {
  "name": "",
  "signature": "sha256:dff3e12b7160018f22a5c30b60c2d94c682bb83a1b63ac2fa5c5764fd2d6f28d"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "<img src=http://continuum.io/media/img/continuum_analytics_logo.png align=\"right\" width=\"30%\">\n",
      "# Blaze - Symbolic Data Analysis\n",
      "\n",
      "We demonstrate Blaze's symbolic nature by comparing its use to Pandas."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import blaze as bz\n",
      "import pandas as pd"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "bz.__version__"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 2,
       "text": [
        "'0.6.5'"
       ]
      }
     ],
     "prompt_number": 2
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### On The Surface\n",
      "\n",
      "Pandas and Blaze feel similar"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "df = pd.read_csv('/home/mrocklin/workspace/blaze/examples/data/iris.csv')\n",
      "df.head(10)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>sepal_length</th>\n",
        "      <th>sepal_width</th>\n",
        "      <th>petal_length</th>\n",
        "      <th>petal_width</th>\n",
        "      <th>species</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> 5.1</td>\n",
        "      <td> 3.5</td>\n",
        "      <td> 1.4</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> 4.9</td>\n",
        "      <td> 3.0</td>\n",
        "      <td> 1.4</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> 4.7</td>\n",
        "      <td> 3.2</td>\n",
        "      <td> 1.3</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td> 4.6</td>\n",
        "      <td> 3.1</td>\n",
        "      <td> 1.5</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td> 5.0</td>\n",
        "      <td> 3.6</td>\n",
        "      <td> 1.4</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
        "      <td> 5.4</td>\n",
        "      <td> 3.9</td>\n",
        "      <td> 1.7</td>\n",
        "      <td> 0.4</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
        "      <td> 4.6</td>\n",
        "      <td> 3.4</td>\n",
        "      <td> 1.4</td>\n",
        "      <td> 0.3</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
        "      <td> 5.0</td>\n",
        "      <td> 3.4</td>\n",
        "      <td> 1.5</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
        "      <td> 4.4</td>\n",
        "      <td> 2.9</td>\n",
        "      <td> 1.4</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
        "      <td> 4.9</td>\n",
        "      <td> 3.1</td>\n",
        "      <td> 1.5</td>\n",
        "      <td> 0.1</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 3,
       "text": [
        "   sepal_length  sepal_width  petal_length  petal_width      species\n",
        "0           5.1          3.5           1.4          0.2  Iris-setosa\n",
        "1           4.9          3.0           1.4          0.2  Iris-setosa\n",
        "2           4.7          3.2           1.3          0.2  Iris-setosa\n",
        "3           4.6          3.1           1.5          0.2  Iris-setosa\n",
        "4           5.0          3.6           1.4          0.2  Iris-setosa\n",
        "5           5.4          3.9           1.7          0.4  Iris-setosa\n",
        "6           4.6          3.4           1.4          0.3  Iris-setosa\n",
        "7           5.0          3.4           1.5          0.2  Iris-setosa\n",
        "8           4.4          2.9           1.4          0.2  Iris-setosa\n",
        "9           4.9          3.1           1.5          0.1  Iris-setosa"
       ]
      }
     ],
     "prompt_number": 3
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "t = bz.Table('/home/mrocklin/workspace/blaze/examples/data/iris.csv')\n",
      "t.head(10)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>sepal_length</th>\n",
        "      <th>sepal_width</th>\n",
        "      <th>petal_length</th>\n",
        "      <th>petal_width</th>\n",
        "      <th>species</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> 5.1</td>\n",
        "      <td> 3.5</td>\n",
        "      <td> 1.4</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> 4.9</td>\n",
        "      <td> 3.0</td>\n",
        "      <td> 1.4</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> 4.7</td>\n",
        "      <td> 3.2</td>\n",
        "      <td> 1.3</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td> 4.6</td>\n",
        "      <td> 3.1</td>\n",
        "      <td> 1.5</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td> 5.0</td>\n",
        "      <td> 3.6</td>\n",
        "      <td> 1.4</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
        "      <td> 5.4</td>\n",
        "      <td> 3.9</td>\n",
        "      <td> 1.7</td>\n",
        "      <td> 0.4</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
        "      <td> 4.6</td>\n",
        "      <td> 3.4</td>\n",
        "      <td> 1.4</td>\n",
        "      <td> 0.3</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
        "      <td> 5.0</td>\n",
        "      <td> 3.4</td>\n",
        "      <td> 1.5</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
        "      <td> 4.4</td>\n",
        "      <td> 2.9</td>\n",
        "      <td> 1.4</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
        "      <td> 4.9</td>\n",
        "      <td> 3.1</td>\n",
        "      <td> 1.5</td>\n",
        "      <td> 0.1</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 4,
       "text": [
        "   sepal_length  sepal_width  petal_length  petal_width      species\n",
        "0           5.1          3.5           1.4          0.2  Iris-setosa\n",
        "1           4.9          3.0           1.4          0.2  Iris-setosa\n",
        "2           4.7          3.2           1.3          0.2  Iris-setosa\n",
        "3           4.6          3.1           1.5          0.2  Iris-setosa\n",
        "4           5.0          3.6           1.4          0.2  Iris-setosa\n",
        "5           5.4          3.9           1.7          0.4  Iris-setosa\n",
        "6           4.6          3.4           1.4          0.3  Iris-setosa\n",
        "7           5.0          3.4           1.5          0.2  Iris-setosa\n",
        "8           4.4          2.9           1.4          0.2  Iris-setosa\n",
        "9           4.9          3.1           1.5          0.1  Iris-setosa"
       ]
      }
     ],
     "prompt_number": 4
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "t.species.distinct()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>species</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>     Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> Iris-versicolor</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>  Iris-virginica</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 5,
       "text": [
        "           species\n",
        "0      Iris-setosa\n",
        "1  Iris-versicolor\n",
        "2   Iris-virginica"
       ]
      }
     ],
     "prompt_number": 5
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### Both perform analytic queries\n",
      "\n",
      "For example, lets compute the minimum sepal length per species"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "df.groupby('species').sepal_length.min()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 6,
       "text": [
        "species\n",
        "Iris-setosa        4.3\n",
        "Iris-versicolor    4.9\n",
        "Iris-virginica     4.9\n",
        "Name: sepal_length, dtype: float64"
       ]
      }
     ],
     "prompt_number": 6
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "bz.by(t.species, \n",
      "      smallest=t.sepal_length.min())"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>species</th>\n",
        "      <th>smallest</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>  Iris-virginica</td>\n",
        "      <td> 4.9</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>     Iris-setosa</td>\n",
        "      <td> 4.3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> Iris-versicolor</td>\n",
        "      <td> 4.9</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 7,
       "text": [
        "           species  smallest\n",
        "0   Iris-virginica       4.9\n",
        "1      Iris-setosa       4.3\n",
        "2  Iris-versicolor       4.9"
       ]
      }
     ],
     "prompt_number": 7
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### But internally they're quite different\n",
      "\n",
      "As an example, lets see how they handle interaction with SQL databases"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### SQL Interaction"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Open up SQLAlchemy engine\n",
      "import sqlalchemy\n",
      "engine = sqlalchemy.create_engine('sqlite:////home/mrocklin/workspace/blaze/examples/data/iris.db')\n",
      "\n",
      "# Pull data from SQLite to Pandas\n",
      "df = pd.read_sql('SELECT * FROM iris', engine)\n",
      "df.head(10)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>sepal_length</th>\n",
        "      <th>sepal_width</th>\n",
        "      <th>petal_length</th>\n",
        "      <th>petal_width</th>\n",
        "      <th>species</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> 5.1</td>\n",
        "      <td> 3.5</td>\n",
        "      <td> 1.4</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> 4.9</td>\n",
        "      <td> 3.0</td>\n",
        "      <td> 1.4</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> 4.7</td>\n",
        "      <td> 3.2</td>\n",
        "      <td> 1.3</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td> 4.6</td>\n",
        "      <td> 3.1</td>\n",
        "      <td> 1.5</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td> 5.0</td>\n",
        "      <td> 3.6</td>\n",
        "      <td> 1.4</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
        "      <td> 5.4</td>\n",
        "      <td> 3.9</td>\n",
        "      <td> 1.7</td>\n",
        "      <td> 0.4</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
        "      <td> 4.6</td>\n",
        "      <td> 3.4</td>\n",
        "      <td> 1.4</td>\n",
        "      <td> 0.3</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
        "      <td> 5.0</td>\n",
        "      <td> 3.4</td>\n",
        "      <td> 1.5</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
        "      <td> 4.4</td>\n",
        "      <td> 2.9</td>\n",
        "      <td> 1.4</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
        "      <td> 4.9</td>\n",
        "      <td> 3.1</td>\n",
        "      <td> 1.5</td>\n",
        "      <td> 0.1</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 8,
       "text": [
        "   sepal_length  sepal_width  petal_length  petal_width      species\n",
        "0           5.1          3.5           1.4          0.2  Iris-setosa\n",
        "1           4.9          3.0           1.4          0.2  Iris-setosa\n",
        "2           4.7          3.2           1.3          0.2  Iris-setosa\n",
        "3           4.6          3.1           1.5          0.2  Iris-setosa\n",
        "4           5.0          3.6           1.4          0.2  Iris-setosa\n",
        "5           5.4          3.9           1.7          0.4  Iris-setosa\n",
        "6           4.6          3.4           1.4          0.3  Iris-setosa\n",
        "7           5.0          3.4           1.5          0.2  Iris-setosa\n",
        "8           4.4          2.9           1.4          0.2  Iris-setosa\n",
        "9           4.9          3.1           1.5          0.1  Iris-setosa"
       ]
      }
     ],
     "prompt_number": 8
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Point Blaze to SQLite table\n",
      "t = bz.Table('sqlite:////home/mrocklin/workspace/blaze/examples/data/iris.db::iris')\n",
      "t.head(10)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>sepal_length</th>\n",
        "      <th>sepal_width</th>\n",
        "      <th>petal_length</th>\n",
        "      <th>petal_width</th>\n",
        "      <th>species</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0 </th>\n",
        "      <td> 5.1</td>\n",
        "      <td> 3.5</td>\n",
        "      <td> 1.4</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1 </th>\n",
        "      <td> 4.9</td>\n",
        "      <td> 3.0</td>\n",
        "      <td> 1.4</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2 </th>\n",
        "      <td> 4.7</td>\n",
        "      <td> 3.2</td>\n",
        "      <td> 1.3</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3 </th>\n",
        "      <td> 4.6</td>\n",
        "      <td> 3.1</td>\n",
        "      <td> 1.5</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4 </th>\n",
        "      <td> 5.0</td>\n",
        "      <td> 3.6</td>\n",
        "      <td> 1.4</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5 </th>\n",
        "      <td> 5.4</td>\n",
        "      <td> 3.9</td>\n",
        "      <td> 1.7</td>\n",
        "      <td> 0.4</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6 </th>\n",
        "      <td> 4.6</td>\n",
        "      <td> 3.4</td>\n",
        "      <td> 1.4</td>\n",
        "      <td> 0.3</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7 </th>\n",
        "      <td> 5.0</td>\n",
        "      <td> 3.4</td>\n",
        "      <td> 1.5</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8 </th>\n",
        "      <td> 4.4</td>\n",
        "      <td> 2.9</td>\n",
        "      <td> 1.4</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9 </th>\n",
        "      <td> 4.9</td>\n",
        "      <td> 3.1</td>\n",
        "      <td> 1.5</td>\n",
        "      <td> 0.1</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>10</th>\n",
        "      <td> 5.4</td>\n",
        "      <td> 3.7</td>\n",
        "      <td> 1.5</td>\n",
        "      <td> 0.2</td>\n",
        "      <td> Iris-setosa</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 9,
       "text": [
        "    sepal_length  sepal_width  petal_length  petal_width      species\n",
        "0            5.1          3.5           1.4          0.2  Iris-setosa\n",
        "1            4.9          3.0           1.4          0.2  Iris-setosa\n",
        "2            4.7          3.2           1.3          0.2  Iris-setosa\n",
        "3            4.6          3.1           1.5          0.2  Iris-setosa\n",
        "4            5.0          3.6           1.4          0.2  Iris-setosa\n",
        "5            5.4          3.9           1.7          0.4  Iris-setosa\n",
        "6            4.6          3.4           1.4          0.3  Iris-setosa\n",
        "7            5.0          3.4           1.5          0.2  Iris-setosa\n",
        "8            4.4          2.9           1.4          0.2  Iris-setosa\n",
        "9            4.9          3.1           1.5          0.1  Iris-setosa\n",
        "..."
       ]
      }
     ],
     "prompt_number": 9
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### And again our analytic query"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "df.groupby('species').sepal_length.min()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 10,
       "text": [
        "species\n",
        "Iris-setosa        4.3\n",
        "Iris-versicolor    4.9\n",
        "Iris-virginica     4.9\n",
        "Name: sepal_length, dtype: float64"
       ]
      }
     ],
     "prompt_number": 10
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "bz.by(t.species, \n",
      "      smallest=t.sepal_length.min())"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>species</th>\n",
        "      <th>smallest</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>     Iris-setosa</td>\n",
        "      <td> 4.3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> Iris-versicolor</td>\n",
        "      <td> 4.9</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>  Iris-virginica</td>\n",
        "      <td> 4.9</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 11,
       "text": [
        "           species  smallest\n",
        "0      Iris-setosa       4.3\n",
        "1  Iris-versicolor       4.9\n",
        "2   Iris-virginica       4.9"
       ]
      }
     ],
     "prompt_number": 11
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### Externally things looks the same"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### Internally very different things happened\n",
      "\n",
      "Pandas pulled data from the SQLite database into local memory, then used pandas algorithms on that data.\n",
      "\n",
      "1. Pandas $\\leftarrow_\\textrm{data}$ SQLite\n",
      "2. Pandas did the work\n",
      "\n",
      "Blaze generated SQL and passed that back to the database\n",
      "\n",
      "1. Blaze $\\leftarrow_\\textrm{metadata}$ SQLite\n",
      "2. Blaze $\\rightarrow_\\textrm{SQL}$ SQLite\n",
      "3. SQLite did the work"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### Lets see the generated SQL\n",
      "\n",
      "Blaze speaks SQLAlchemy"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "expr = bz.by(t.species, \n",
      "             smallest=t.sepal_length.min())\n",
      "\n",
      "result = bz.compute(expr, {t: t.data.table})\n",
      "result"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 12,
       "text": [
        "<sqlalchemy.sql.selectable.Select at 0x7f1c030aa3d0; Select object>"
       ]
      }
     ],
     "prompt_number": 12
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "SQLAlchemy in turn speaks SQLite"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print result"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "SELECT iris.species, min(iris.sepal_length) AS smallest \n",
        "FROM iris GROUP BY iris.species\n"
       ]
      }
     ],
     "prompt_number": 13
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "A more complex example in SQLAlchemy"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print bz.compute(t[t.sepal_length > 5].species.count_values(), \n",
      "                 {t: t.data.table})"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "SELECT iris.species, count(iris.species) AS count \n",
        "FROM iris \n",
        "WHERE iris.sepal_length > :sepal_length_1 GROUP BY iris.species ORDER BY count DESC\n"
       ]
      }
     ],
     "prompt_number": 14
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "What users actually experience"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "t[t.sepal_length > 5].species.count_values()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>species</th>\n",
        "      <th>count</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>  Iris-virginica</td>\n",
        "      <td> 49</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> Iris-versicolor</td>\n",
        "      <td> 47</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>     Iris-setosa</td>\n",
        "      <td> 22</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 15,
       "text": [
        "           species  count\n",
        "0   Iris-virginica     49\n",
        "1  Iris-versicolor     47\n",
        "2      Iris-setosa     22"
       ]
      }
     ],
     "prompt_number": 15
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### Main Points\n",
      "\n",
      "1.  Blaze feels like NumPy/Pandas\n",
      "2.  Today, Blaze doesn't perform computation\n",
      "3.  Instead, Blaze manages metadata and APIs"
     ]
    }
   ],
   "metadata": {}
  }
 ]
}